Finish merging the 2.4/2.6 blkdev backend drivers.
3e6377f8Me8IqtvEhb70XFgOvqQH7A linux-2.4.26-xen-sparse/arch/xen/drivers/balloon/balloon.c
4083dc16z0jvZEH4PiVDbDRreaNp6w linux-2.4.26-xen-sparse/arch/xen/drivers/blkif/Makefile
4083dc16KQus88a4U3uCV6qVCA6_8Q linux-2.4.26-xen-sparse/arch/xen/drivers/blkif/backend/Makefile
-4087cf0dPeHOvzmZAazvwLslKEF93A linux-2.4.26-xen-sparse/arch/xen/drivers/blkif/backend/common.h
-4087cf0dkVF3I19gpT1cNubeJgQr7g linux-2.4.26-xen-sparse/arch/xen/drivers/blkif/backend/main.c
-4087cf0dlv1Dw4MAbeRStPPG8IvPPg linux-2.4.26-xen-sparse/arch/xen/drivers/blkif/backend/vbd.c
4075806dI5kfeMD5RV-DA0PYoThx_w linux-2.4.26-xen-sparse/arch/xen/drivers/blkif/frontend/Makefile
4075806d4-j7vN0Mn0bklI1cRUX1vQ linux-2.4.26-xen-sparse/arch/xen/drivers/blkif/frontend/common.h
4075806d3fJqqDC1pYYPTZPc575iKg linux-2.4.26-xen-sparse/arch/xen/drivers/blkif/frontend/main.c
+++ /dev/null
-/******************************************************************************
- * arch/xen/drivers/blkif/backend/common.h
- */
-
-#ifndef __BLKIF__BACKEND__COMMON_H__
-#define __BLKIF__BACKEND__COMMON_H__
-
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/rbtree.h>
-#include <linux/interrupt.h>
-#include <linux/slab.h>
-#include <linux/blkdev.h>
-#include <asm/ctrl_if.h>
-#include <asm/io.h>
-#include <asm-xen/hypervisor-ifs/io/blkif.h>
-
-#if 0
-#define ASSERT(_p) \
- if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s", #_p , \
- __LINE__, __FILE__); *(int*)0=0; }
-#define DPRINTK(_f, _a...) printk(KERN_ALERT "(file=%s, line=%d) " _f, \
- __FILE__ , __LINE__ , ## _a )
-#else
-#define ASSERT(_p) ((void)0)
-#define DPRINTK(_f, _a...) ((void)0)
-#endif
-
-#define PRINTK(_f, _a...) printk(KERN_ALERT "(file=%s, line=%d) " _f, \
- __FILE__ , __LINE__ , ## _a )
-
-typedef struct blkif_st {
- /* Unique identifier for this interface. */
- domid_t domid;
- unsigned int handle;
- /* Physical parameters of the comms window. */
- unsigned long shmem_frame;
- unsigned int evtchn;
- int irq;
- /* Comms information. */
- blkif_ring_t *blk_ring_base; /* ioremap()'ed ptr to shmem_frame. */
- BLKIF_RING_IDX blk_req_cons; /* Request consumer. */
- BLKIF_RING_IDX blk_resp_prod; /* Private version of resp. producer. */
- /* VBDs attached to this interface. */
- rb_root_t vbd_rb; /* Mapping from 16-bit vdevices to VBDs. */
- spinlock_t vbd_lock; /* Protects VBD mapping. */
- /* Private fields. */
- enum { DISCONNECTED, DISCONNECTING, CONNECTED } status;
- /*
- * DISCONNECT response is deferred until pending requests are ack'ed.
- * We therefore need to store the id from the original request.
- */
- u8 disconnect_rspid;
- struct blkif_st *hash_next;
- struct list_head blkdev_list;
- spinlock_t blk_ring_lock;
- atomic_t refcnt;
-} blkif_t;
-
-void blkif_create(blkif_be_create_t *create);
-void blkif_destroy(blkif_be_destroy_t *destroy);
-void blkif_connect(blkif_be_connect_t *connect);
-int blkif_disconnect(blkif_be_disconnect_t *disconnect, u8 rsp_id);
-void __blkif_disconnect_complete(blkif_t *blkif);
-blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle);
-#define blkif_get(_b) (atomic_inc(&(_b)->refcnt))
-#define blkif_put(_b) \
- do { \
- if ( atomic_dec_and_test(&(_b)->refcnt) ) \
- __blkif_disconnect_complete(_b); \
- } while (0)
-
-/* An entry in a list of xen_extents. */
-typedef struct _blkif_extent_le {
- blkif_extent_t extent; /* an individual extent */
- struct _blkif_extent_le *next; /* and a pointer to the next */
-} blkif_extent_le_t;
-
-typedef struct _vbd {
- blkif_vdev_t vdevice; /* what the domain refers to this vbd as */
- unsigned char readonly; /* Non-zero -> read-only */
- unsigned char type; /* VDISK_TYPE_xxx */
- blkif_extent_le_t *extents; /* list of xen_extents making up this vbd */
- rb_node_t rb; /* for linking into R-B tree lookup struct */
-} vbd_t;
-
-void vbd_create(blkif_be_vbd_create_t *create);
-void vbd_grow(blkif_be_vbd_grow_t *grow);
-void vbd_shrink(blkif_be_vbd_shrink_t *shrink);
-void vbd_destroy(blkif_be_vbd_destroy_t *delete);
-int vbd_probe(blkif_t *blkif, vdisk_t *vbd_info, int max_vbds);
-void destroy_all_vbds(blkif_t *blkif);
-
-/* Describes a [partial] disk extent (part of a block io request) */
-typedef struct {
- unsigned short dev;
- unsigned short nr_sects;
- unsigned long buffer;
- blkif_sector_t sector_number;
-} phys_seg_t;
-
-int vbd_translate(phys_seg_t *pseg, blkif_t *blkif, int operation);
-
-void blkif_interface_init(void);
-void blkif_ctrlif_init(void);
-
-void blkif_deschedule(blkif_t *blkif);
-
-void blkif_be_int(int irq, void *dev_id, struct pt_regs *regs);
-
-#endif /* __BLKIF__BACKEND__COMMON_H__ */
+++ /dev/null
-/******************************************************************************
- * arch/xen/drivers/blkif/backend/main.c
- *
- * Back-end of the driver for virtual block devices. This portion of the
- * driver exports a 'unified' block-device interface that can be accessed
- * by any operating system that implements a compatible front end. A
- * reference front-end implementation can be found in:
- * arch/xen/drivers/blkif/frontend
- *
- * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
- */
-
-#include "common.h"
-
-/*
- * These are rather arbitrary. They are fairly large because adjacent requests
- * pulled from a communication ring are quite likely to end up being part of
- * the same scatter/gather request at the disc.
- *
- * ** TRY INCREASING 'MAX_PENDING_REQS' IF WRITE SPEEDS SEEM TOO LOW **
- * This will increase the chances of being able to write whole tracks.
- * 64 should be enough to keep us competitive with Linux.
- */
-#define MAX_PENDING_REQS 64
-#define BATCH_PER_DOMAIN 16
-
-/*
- * NB. We place a page of padding between each buffer page to avoid incorrect
- * merging of requests by the IDE and SCSI merging routines. Otherwise, two
- * adjacent buffers in a scatter-gather request would have adjacent page
- * numbers: since the merge routines don't realise that this is in *pseudophys*
- * space, not real space, they may collapse the s-g elements!
- */
-static unsigned long mmap_vstart;
-#define MMAP_PAGES_PER_REQUEST \
- (2 * (BLKIF_MAX_SEGMENTS_PER_REQUEST + 1))
-#define MMAP_PAGES \
- (MAX_PENDING_REQS * MMAP_PAGES_PER_REQUEST)
-#define MMAP_VADDR(_req,_seg) \
- (mmap_vstart + \
- ((_req) * MMAP_PAGES_PER_REQUEST * PAGE_SIZE) + \
- ((_seg) * 2 * PAGE_SIZE))
-
-/*
- * Each outstanding request that we've passed to the lower device layers has a
- * 'pending_req' allocated to it. Each buffer_head that completes decrements
- * the pendcnt towards zero. When it hits zero, the specified domain has a
- * response queued for it, with the saved 'id' passed back.
- */
-typedef struct {
- blkif_t *blkif;
- unsigned long id;
- int nr_pages;
- atomic_t pendcnt;
- unsigned short operation;
- int status;
-} pending_req_t;
-
-/*
- * We can't allocate pending_req's in order, since they may complete out of
- * order. We therefore maintain an allocation ring. This ring also indicates
- * when enough work has been passed down -- at that point the allocation ring
- * will be empty.
- */
-static pending_req_t pending_reqs[MAX_PENDING_REQS];
-static unsigned char pending_ring[MAX_PENDING_REQS];
-static spinlock_t pend_prod_lock = SPIN_LOCK_UNLOCKED;
-/* NB. We use a different index type to differentiate from shared blk rings. */
-typedef unsigned int PEND_RING_IDX;
-#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
-static PEND_RING_IDX pending_prod, pending_cons;
-#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
-
-static kmem_cache_t *buffer_head_cachep;
-
-static int do_block_io_op(blkif_t *blkif, int max_to_do);
-static void dispatch_probe(blkif_t *blkif, blkif_request_t *req);
-static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req);
-static void make_response(blkif_t *blkif, unsigned long id,
- unsigned short op, int st);
-
-static void fast_flush_area(int idx, int nr_pages)
-{
- multicall_entry_t mcl[MMAP_PAGES_PER_REQUEST];
- int i;
-
- for ( i = 0; i < nr_pages; i++ )
- {
- mcl[i].op = __HYPERVISOR_update_va_mapping;
- mcl[i].args[0] = MMAP_VADDR(idx, i) >> PAGE_SHIFT;
- mcl[i].args[1] = 0;
- mcl[i].args[2] = 0;
- }
-
- mcl[nr_pages-1].args[2] = UVMF_FLUSH_TLB;
- if ( unlikely(HYPERVISOR_multicall(mcl, nr_pages) != 0) )
- BUG();
-}
-
-
-/******************************************************************
- * BLOCK-DEVICE SCHEDULER LIST MAINTENANCE
- */
-
-static struct list_head io_schedule_list;
-static spinlock_t io_schedule_list_lock;
-
-static int __on_blkdev_list(blkif_t *blkif)
-{
- return blkif->blkdev_list.next != NULL;
-}
-
-static void remove_from_blkdev_list(blkif_t *blkif)
-{
- unsigned long flags;
- if ( !__on_blkdev_list(blkif) ) return;
- spin_lock_irqsave(&io_schedule_list_lock, flags);
- if ( __on_blkdev_list(blkif) )
- {
- list_del(&blkif->blkdev_list);
- blkif->blkdev_list.next = NULL;
- blkif_put(blkif);
- }
- spin_unlock_irqrestore(&io_schedule_list_lock, flags);
-}
-
-static void add_to_blkdev_list_tail(blkif_t *blkif)
-{
- unsigned long flags;
- if ( __on_blkdev_list(blkif) ) return;
- spin_lock_irqsave(&io_schedule_list_lock, flags);
- if ( !__on_blkdev_list(blkif) && (blkif->status == CONNECTED) )
- {
- list_add_tail(&blkif->blkdev_list, &io_schedule_list);
- blkif_get(blkif);
- }
- spin_unlock_irqrestore(&io_schedule_list_lock, flags);
-}
-
-
-/******************************************************************
- * SCHEDULER FUNCTIONS
- */
-
-static DECLARE_WAIT_QUEUE_HEAD(io_schedule_wait);
-
-static int io_schedule(void *arg)
-{
- DECLARE_WAITQUEUE(wq, current);
-
- blkif_t *blkif;
- struct list_head *ent;
-
- for ( ; ; )
- {
- /* Wait for work to do. */
- add_wait_queue(&io_schedule_wait, &wq);
- set_current_state(TASK_INTERRUPTIBLE);
- if ( (NR_PENDING_REQS == MAX_PENDING_REQS) ||
- list_empty(&io_schedule_list) )
- schedule();
- __set_current_state(TASK_RUNNING);
- remove_wait_queue(&io_schedule_wait, &wq);
-
- /* Queue up a batch of requests. */
- while ( (NR_PENDING_REQS < MAX_PENDING_REQS) &&
- !list_empty(&io_schedule_list) )
- {
- ent = io_schedule_list.next;
- blkif = list_entry(ent, blkif_t, blkdev_list);
- blkif_get(blkif);
- remove_from_blkdev_list(blkif);
- if ( do_block_io_op(blkif, BATCH_PER_DOMAIN) )
- add_to_blkdev_list_tail(blkif);
- blkif_put(blkif);
- }
-
- /* Push the batch through to disc. */
- run_task_queue(&tq_disk);
- }
-}
-
-static void maybe_trigger_io_schedule(void)
-{
- /*
- * Needed so that two processes, who together make the following predicate
- * true, don't both read stale values and evaluate the predicate
- * incorrectly. Incredibly unlikely to stall the scheduler on x86, but...
- */
- smp_mb();
-
- if ( (NR_PENDING_REQS < (MAX_PENDING_REQS/2)) &&
- !list_empty(&io_schedule_list) )
- wake_up(&io_schedule_wait);
-}
-
-
-
-/******************************************************************
- * COMPLETION CALLBACK -- Called as bh->b_end_io()
- */
-
-static void __end_block_io_op(pending_req_t *pending_req, int uptodate)
-{
- unsigned long flags;
-
- /* An error fails the entire request. */
- if ( !uptodate )
- {
- DPRINTK("Buffer not up-to-date at end of operation\n");
- pending_req->status = BLKIF_RSP_ERROR;
- }
-
- if ( atomic_dec_and_test(&pending_req->pendcnt) )
- {
- int pending_idx = pending_req - pending_reqs;
- fast_flush_area(pending_idx, pending_req->nr_pages);
- make_response(pending_req->blkif, pending_req->id,
- pending_req->operation, pending_req->status);
- blkif_put(pending_req->blkif);
- spin_lock_irqsave(&pend_prod_lock, flags);
- pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
- spin_unlock_irqrestore(&pend_prod_lock, flags);
- maybe_trigger_io_schedule();
- }
-}
-
-static void end_block_io_op(struct buffer_head *bh, int uptodate)
-{
- __end_block_io_op(bh->b_private, uptodate);
- kmem_cache_free(buffer_head_cachep, bh);
-}
-
-
-
-/******************************************************************************
- * NOTIFICATION FROM GUEST OS.
- */
-
-void blkif_be_int(int irq, void *dev_id, struct pt_regs *regs)
-{
- blkif_t *blkif = dev_id;
- add_to_blkdev_list_tail(blkif);
- maybe_trigger_io_schedule();
-}
-
-
-
-/******************************************************************
- * DOWNWARD CALLS -- These interface with the block-device layer proper.
- */
-
-static int do_block_io_op(blkif_t *blkif, int max_to_do)
-{
- blkif_ring_t *blk_ring = blkif->blk_ring_base;
- blkif_request_t *req;
- BLKIF_RING_IDX i;
- int more_to_do = 0;
-
- /* Take items off the comms ring, taking care not to overflow. */
- for ( i = blkif->blk_req_cons;
- (i != blk_ring->req_prod) && ((i-blkif->blk_resp_prod) !=
- BLKIF_RING_SIZE);
- i++ )
- {
- if ( (max_to_do-- == 0) || (NR_PENDING_REQS == MAX_PENDING_REQS) )
- {
- more_to_do = 1;
- break;
- }
-
- req = &blk_ring->ring[MASK_BLKIF_IDX(i)].req;
- switch ( req->operation )
- {
- case BLKIF_OP_READ:
- case BLKIF_OP_WRITE:
- dispatch_rw_block_io(blkif, req);
- break;
-
- case BLKIF_OP_PROBE:
- dispatch_probe(blkif, req);
- break;
-
- default:
- DPRINTK("error: unknown block io operation [%d]\n",
- blk_ring->ring[i].req.operation);
- make_response(blkif, blk_ring->ring[i].req.id,
- blk_ring->ring[i].req.operation, BLKIF_RSP_ERROR);
- break;
- }
- }
-
- blkif->blk_req_cons = i;
- return more_to_do;
-}
-
-static void dispatch_probe(blkif_t *blkif, blkif_request_t *req)
-{
- int rsp = BLKIF_RSP_ERROR;
- int pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
-
- /* We expect one buffer only. */
- if ( unlikely(req->nr_segments != 1) )
- goto out;
-
- /* Make sure the buffer is page-sized. */
- if ( (blkif_first_sect(req->frame_and_sects[0]) != 0) ||
- (blkif_last_sect(req->frame_and_sects[0]) != 7) )
- goto out;
-
- if ( HYPERVISOR_update_va_mapping_otherdomain(
- MMAP_VADDR(pending_idx, 0) >> PAGE_SHIFT,
- (pte_t) { (req->frame_and_sects[0] & PAGE_MASK) | __PAGE_KERNEL },
- 0, blkif->domid) )
- goto out;
-
- rsp = vbd_probe(blkif, (vdisk_t *)MMAP_VADDR(pending_idx, 0),
- PAGE_SIZE / sizeof(vdisk_t));
-
- out:
- fast_flush_area(pending_idx, 1);
- make_response(blkif, req->id, req->operation, rsp);
-}
-
-static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req)
-{
- extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]);
- struct buffer_head *bh;
- int operation = (req->operation == BLKIF_OP_WRITE) ? WRITE : READ;
- short nr_sects;
- unsigned long buffer, fas;
- int i, tot_sects, pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
- pending_req_t *pending_req;
- unsigned long remap_prot;
- multicall_entry_t mcl[MMAP_PAGES_PER_REQUEST];
-
- /* We map virtual scatter/gather segments to physical segments. */
- int new_segs, nr_psegs = 0;
- phys_seg_t phys_seg[BLKIF_MAX_SEGMENTS_PER_REQUEST + 1];
-
- /* Check that number of segments is sane. */
- if ( unlikely(req->nr_segments == 0) ||
- unlikely(req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST) )
- {
- DPRINTK("Bad number of segments in request (%d)\n", req->nr_segments);
- goto bad_descriptor;
- }
-
- /*
- * Check each address/size pair is sane, and convert into a
- * physical device and block offset. Note that if the offset and size
- * crosses a virtual extent boundary, we may end up with more
- * physical scatter/gather segments than virtual segments.
- */
- for ( i = tot_sects = 0; i < req->nr_segments; i++, tot_sects += nr_sects )
- {
- fas = req->frame_and_sects[i];
- buffer = (fas & PAGE_MASK) | (blkif_first_sect(fas) << 9);
- nr_sects = blkif_last_sect(fas) - blkif_first_sect(fas) + 1;
-
- if ( nr_sects <= 0 )
- goto bad_descriptor;
-
- phys_seg[nr_psegs].dev = req->device;
- phys_seg[nr_psegs].sector_number = req->sector_number + tot_sects;
- phys_seg[nr_psegs].buffer = buffer;
- phys_seg[nr_psegs].nr_sects = nr_sects;
-
- /* Translate the request into the relevant 'physical device' */
- new_segs = vbd_translate(&phys_seg[nr_psegs], blkif, operation);
- if ( new_segs < 0 )
- {
- DPRINTK("access denied: %s of [%llu,%llu] on dev=%04x\n",
- operation == READ ? "read" : "write",
- req->sector_number + tot_sects,
- req->sector_number + tot_sects + nr_sects,
- req->device);
- goto bad_descriptor;
- }
-
- nr_psegs += new_segs;
- ASSERT(nr_psegs <= (BLKIF_MAX_SEGMENTS_PER_REQUEST+1));
- }
-
- /* Nonsensical zero-sized request? */
- if ( unlikely(nr_psegs == 0) )
- goto bad_descriptor;
-
- if ( operation == READ )
- remap_prot = _PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW;
- else
- remap_prot = _PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED;
-
- for ( i = 0; i < nr_psegs; i++ )
- {
- mcl[i].op = __HYPERVISOR_update_va_mapping_otherdomain;
- mcl[i].args[0] = MMAP_VADDR(pending_idx, i) >> PAGE_SHIFT;
- mcl[i].args[1] = (phys_seg[i].buffer & PAGE_MASK) | remap_prot;
- mcl[i].args[2] = 0;
- mcl[i].args[3] = blkif->domid;
-
- phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx, i))>>PAGE_SHIFT] =
- phys_seg[i].buffer >> PAGE_SHIFT;
- }
-
- if ( unlikely(HYPERVISOR_multicall(mcl, nr_psegs) != 0) )
- BUG();
-
- for ( i = 0; i < nr_psegs; i++ )
- {
- if ( unlikely(mcl[i].args[5] != 0) )
- {
- DPRINTK("invalid buffer -- could not remap it\n");
- fast_flush_area(pending_idx, nr_psegs);
- goto bad_descriptor;
- }
- }
-
- pending_req = &pending_reqs[pending_idx];
- pending_req->blkif = blkif;
- pending_req->id = req->id;
- pending_req->operation = operation;
- pending_req->status = BLKIF_RSP_OKAY;
- pending_req->nr_pages = nr_psegs;
- atomic_set(&pending_req->pendcnt, nr_psegs);
- pending_cons++;
-
- blkif_get(blkif);
-
- /* Now we pass each segment down to the real blkdev layer. */
- for ( i = 0; i < nr_psegs; i++ )
- {
- bh = kmem_cache_alloc(buffer_head_cachep, GFP_ATOMIC);
- if ( unlikely(bh == NULL) )
- {
- __end_block_io_op(pending_req, 0);
- continue;
- }
- memset(bh, 0, sizeof (struct buffer_head));
-
- init_waitqueue_head(&bh->b_wait);
- bh->b_size = phys_seg[i].nr_sects << 9;
- bh->b_dev = phys_seg[i].dev;
- bh->b_rdev = phys_seg[i].dev;
- bh->b_rsector = (unsigned long)phys_seg[i].sector_number;
- bh->b_data = (char *)MMAP_VADDR(pending_idx, i) +
- (phys_seg[i].buffer & ~PAGE_MASK);
- bh->b_page = virt_to_page(MMAP_VADDR(pending_idx, i));
- bh->b_end_io = end_block_io_op;
- bh->b_private = pending_req;
-
- bh->b_state = (1 << BH_Mapped) | (1 << BH_Lock) |
- (1 << BH_Req) | (1 << BH_Launder);
- if ( operation == WRITE )
- bh->b_state |= (1 << BH_JBD) | (1 << BH_Req) | (1 << BH_Uptodate);
-
- atomic_set(&bh->b_count, 1);
-
- /* Dispatch a single request. We'll flush it to disc later. */
- generic_make_request(operation, bh);
- }
-
- return;
-
- bad_descriptor:
- make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
-}
-
-
-
-/******************************************************************
- * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING
- */
-
-
-static void make_response(blkif_t *blkif, unsigned long id,
- unsigned short op, int st)
-{
- blkif_response_t *resp;
- unsigned long flags;
-
- /* Place on the response ring for the relevant domain. */
- spin_lock_irqsave(&blkif->blk_ring_lock, flags);
- resp = &blkif->blk_ring_base->
- ring[MASK_BLKIF_IDX(blkif->blk_resp_prod)].resp;
- resp->id = id;
- resp->operation = op;
- resp->status = st;
- wmb();
- blkif->blk_ring_base->resp_prod = ++blkif->blk_resp_prod;
- spin_unlock_irqrestore(&blkif->blk_ring_lock, flags);
-
- /* Kick the relevant domain. */
- notify_via_evtchn(blkif->evtchn);
-}
-
-void blkif_deschedule(blkif_t *blkif)
-{
- remove_from_blkdev_list(blkif);
-}
-
-static int __init blkif_init(void)
-{
- int i;
-
- if ( !(start_info.flags & SIF_INITDOMAIN)
- && !(start_info.flags & SIF_BLK_BE_DOMAIN) )
- return 0;
-
- blkif_interface_init();
-
- if ( (mmap_vstart = allocate_empty_lowmem_region(MMAP_PAGES)) == 0 )
- BUG();
-
- pending_cons = 0;
- pending_prod = MAX_PENDING_REQS;
- memset(pending_reqs, 0, sizeof(pending_reqs));
- for ( i = 0; i < MAX_PENDING_REQS; i++ )
- pending_ring[i] = i;
-
- spin_lock_init(&io_schedule_list_lock);
- INIT_LIST_HEAD(&io_schedule_list);
-
- if ( kernel_thread(io_schedule, 0, CLONE_FS | CLONE_FILES) < 0 )
- BUG();
-
- buffer_head_cachep = kmem_cache_create(
- "buffer_head_cache", sizeof(struct buffer_head),
- 0, SLAB_HWCACHE_ALIGN, NULL, NULL);
-
- blkif_ctrlif_init();
-
- return 0;
-}
-
-__initcall(blkif_init);
+++ /dev/null
-/******************************************************************************
- * arch/xen/drivers/blkif/backend/vbd.c
- *
- * Routines for managing virtual block devices (VBDs).
- *
- * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
- */
-
-#include "common.h"
-
-void vbd_create(blkif_be_vbd_create_t *create)
-{
- vbd_t *vbd;
- rb_node_t **rb_p, *rb_parent = NULL;
- blkif_t *blkif;
- blkif_vdev_t vdevice = create->vdevice;
-
- blkif = blkif_find_by_handle(create->domid, create->blkif_handle);
- if ( unlikely(blkif == NULL) )
- {
- PRINTK("vbd_create attempted for non-existent blkif (%u,%u)\n",
- create->domid, create->blkif_handle);
- create->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
- return;
- }
-
- spin_lock(&blkif->vbd_lock);
-
- rb_p = &blkif->vbd_rb.rb_node;
- while ( *rb_p != NULL )
- {
- rb_parent = *rb_p;
- vbd = rb_entry(rb_parent, vbd_t, rb);
- if ( vdevice < vbd->vdevice )
- {
- rb_p = &rb_parent->rb_left;
- }
- else if ( vdevice > vbd->vdevice )
- {
- rb_p = &rb_parent->rb_right;
- }
- else
- {
- PRINTK("vbd_create attempted for already existing vbd\n");
- create->status = BLKIF_BE_STATUS_VBD_EXISTS;
- goto out;
- }
- }
-
- if ( unlikely((vbd = kmalloc(sizeof(vbd_t), GFP_KERNEL)) == NULL) )
- {
- PRINTK("vbd_create: out of memory\n");
- create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
- goto out;
- }
-
- vbd->vdevice = vdevice;
- vbd->readonly = create->readonly;
- vbd->type = VDISK_TYPE_DISK | VDISK_FLAG_VIRT;
- vbd->extents = NULL;
-
- rb_link_node(&vbd->rb, rb_parent, rb_p);
- rb_insert_color(&vbd->rb, &blkif->vbd_rb);
-
- DPRINTK("Successful creation of vdev=%04x (dom=%u)\n",
- vdevice, create->domid);
- create->status = BLKIF_BE_STATUS_OKAY;
-
- out:
- spin_unlock(&blkif->vbd_lock);
-}
-
-
-/* Grow a VBD by appending a new extent. Fails if the VBD doesn't exist. */
-void vbd_grow(blkif_be_vbd_grow_t *grow)
-{
- blkif_t *blkif;
- blkif_extent_le_t **px, *x;
- vbd_t *vbd = NULL;
- rb_node_t *rb;
- blkif_vdev_t vdevice = grow->vdevice;
- unsigned long sz;
-
-
- blkif = blkif_find_by_handle(grow->domid, grow->blkif_handle);
- if ( unlikely(blkif == NULL) )
- {
- PRINTK("vbd_grow attempted for non-existent blkif (%u,%u)\n",
- grow->domid, grow->blkif_handle);
- grow->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
- return;
- }
-
- spin_lock(&blkif->vbd_lock);
-
- rb = blkif->vbd_rb.rb_node;
- while ( rb != NULL )
- {
- vbd = rb_entry(rb, vbd_t, rb);
- if ( vdevice < vbd->vdevice )
- rb = rb->rb_left;
- else if ( vdevice > vbd->vdevice )
- rb = rb->rb_right;
- else
- break;
- }
-
- if ( unlikely(vbd == NULL) || unlikely(vbd->vdevice != vdevice) )
- {
- PRINTK("vbd_grow: attempted to append extent to non-existent VBD.\n");
- grow->status = BLKIF_BE_STATUS_VBD_NOT_FOUND;
- goto out;
- }
-
- if ( unlikely((x = kmalloc(sizeof(blkif_extent_le_t),
- GFP_KERNEL)) == NULL) )
- {
- PRINTK("vbd_grow: out of memory\n");
- grow->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
- goto out;
- }
-
- x->extent.device = grow->extent.device;
- x->extent.sector_start = grow->extent.sector_start;
- x->extent.sector_length = grow->extent.sector_length;
- x->next = (blkif_extent_le_t *)NULL;
-
- if( !blk_size[MAJOR(x->extent.device)] )
- {
- PRINTK("vbd_grow: device %08x doesn't exist.\n", x->extent.device);
- grow->status = BLKIF_BE_STATUS_EXTENT_NOT_FOUND;
- goto out;
- }
-
- /* convert blocks (1KB) to sectors */
- sz = blk_size[MAJOR(x->extent.device)][MINOR(x->extent.device)] * 2;
-
- if ( sz == 0 )
- {
- PRINTK("vbd_grow: device %08x zero size!\n", x->extent.device);
- grow->status = BLKIF_BE_STATUS_EXTENT_NOT_FOUND;
- goto out;
- }
-
- if ( x->extent.sector_start > 0 )
- {
- PRINTK("vbd_grow: device %08x start not zero!\n", x->extent.device);
- grow->status = BLKIF_BE_STATUS_EXTENT_NOT_FOUND;
- goto out;
- }
-
- /*
- * NB. This test assumes sector_start == 0, which is always the case
- * in Xen 1.3. In fact the whole grow/shrink interface could do with
- * some simplification.
- */
- if ( x->extent.sector_length > sz )
- x->extent.sector_length = sz;
-
- DPRINTK("vbd_grow: requested_len %llu actual_len %lu\n",
- x->extent.sector_length, sz);
-
- for ( px = &vbd->extents; *px != NULL; px = &(*px)->next )
- continue;
-
- *px = x;
-
- DPRINTK("Successful grow of vdev=%04x (dom=%u)\n",
- vdevice, grow->domid);
-
- grow->status = BLKIF_BE_STATUS_OKAY;
-
- out:
- spin_unlock(&blkif->vbd_lock);
-}
-
-
-void vbd_shrink(blkif_be_vbd_shrink_t *shrink)
-{
- blkif_t *blkif;
- blkif_extent_le_t **px, *x;
- vbd_t *vbd = NULL;
- rb_node_t *rb;
- blkif_vdev_t vdevice = shrink->vdevice;
-
- blkif = blkif_find_by_handle(shrink->domid, shrink->blkif_handle);
- if ( unlikely(blkif == NULL) )
- {
- DPRINTK("vbd_shrink attempted for non-existent blkif (%u,%u)\n",
- shrink->domid, shrink->blkif_handle);
- shrink->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
- return;
- }
-
- spin_lock(&blkif->vbd_lock);
-
- rb = blkif->vbd_rb.rb_node;
- while ( rb != NULL )
- {
- vbd = rb_entry(rb, vbd_t, rb);
- if ( vdevice < vbd->vdevice )
- rb = rb->rb_left;
- else if ( vdevice > vbd->vdevice )
- rb = rb->rb_right;
- else
- break;
- }
-
- if ( unlikely(vbd == NULL) || unlikely(vbd->vdevice != vdevice) )
- {
- shrink->status = BLKIF_BE_STATUS_VBD_NOT_FOUND;
- goto out;
- }
-
- if ( unlikely(vbd->extents == NULL) )
- {
- shrink->status = BLKIF_BE_STATUS_EXTENT_NOT_FOUND;
- goto out;
- }
-
- /* Find the last extent. We now know that there is at least one. */
- for ( px = &vbd->extents; (*px)->next != NULL; px = &(*px)->next )
- continue;
-
- x = *px;
- *px = x->next;
- kfree(x);
-
- shrink->status = BLKIF_BE_STATUS_OKAY;
-
- out:
- spin_unlock(&blkif->vbd_lock);
-}
-
-
-void vbd_destroy(blkif_be_vbd_destroy_t *destroy)
-{
- blkif_t *blkif;
- vbd_t *vbd;
- rb_node_t *rb;
- blkif_extent_le_t *x, *t;
- blkif_vdev_t vdevice = destroy->vdevice;
-
- blkif = blkif_find_by_handle(destroy->domid, destroy->blkif_handle);
- if ( unlikely(blkif == NULL) )
- {
- PRINTK("vbd_destroy attempted for non-existent blkif (%u,%u)\n",
- destroy->domid, destroy->blkif_handle);
- destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
- return;
- }
-
- spin_lock(&blkif->vbd_lock);
-
- rb = blkif->vbd_rb.rb_node;
- while ( rb != NULL )
- {
- vbd = rb_entry(rb, vbd_t, rb);
- if ( vdevice < vbd->vdevice )
- rb = rb->rb_left;
- else if ( vdevice > vbd->vdevice )
- rb = rb->rb_right;
- else
- goto found;
- }
-
- destroy->status = BLKIF_BE_STATUS_VBD_NOT_FOUND;
- goto out;
-
- found:
- rb_erase(rb, &blkif->vbd_rb);
- x = vbd->extents;
- kfree(vbd);
-
- while ( x != NULL )
- {
- t = x->next;
- kfree(x);
- x = t;
- }
-
- out:
- spin_unlock(&blkif->vbd_lock);
-}
-
-
-void destroy_all_vbds(blkif_t *blkif)
-{
- vbd_t *vbd;
- rb_node_t *rb;
- blkif_extent_le_t *x, *t;
-
- spin_lock(&blkif->vbd_lock);
-
- while ( (rb = blkif->vbd_rb.rb_node) != NULL )
- {
- vbd = rb_entry(rb, vbd_t, rb);
-
- rb_erase(rb, &blkif->vbd_rb);
- x = vbd->extents;
- kfree(vbd);
-
- while ( x != NULL )
- {
- t = x->next;
- kfree(x);
- x = t;
- }
- }
-
- spin_unlock(&blkif->vbd_lock);
-}
-
-
-static int vbd_probe_single(blkif_t *blkif, vdisk_t *vbd_info, vbd_t *vbd)
-{
- blkif_extent_le_t *x;
-
- vbd_info->device = vbd->vdevice;
- vbd_info->info = vbd->type;
- if ( vbd->readonly )
- vbd_info->info |= VDISK_FLAG_RO;
- vbd_info->capacity = 0ULL;
- for ( x = vbd->extents; x != NULL; x = x->next )
- vbd_info->capacity += x->extent.sector_length;
-
- return 0;
-}
-
-
-int vbd_probe(blkif_t *blkif, vdisk_t *vbd_info, int max_vbds)
-{
- int rc = 0, nr_vbds = 0;
- rb_node_t *rb;
-
- spin_lock(&blkif->vbd_lock);
-
- if ( (rb = blkif->vbd_rb.rb_node) == NULL )
- goto out;
-
- new_subtree:
- /* STEP 1. Find least node (it'll be left-most). */
- while ( rb->rb_left != NULL )
- rb = rb->rb_left;
-
- for ( ; ; )
- {
- /* STEP 2. Dealt with left subtree. Now process current node. */
- if ( (rc = vbd_probe_single(blkif, &vbd_info[nr_vbds],
- rb_entry(rb, vbd_t, rb))) != 0 )
- goto out;
- if ( ++nr_vbds == max_vbds )
- goto out;
-
- /* STEP 3. Process right subtree, if any. */
- if ( rb->rb_right != NULL )
- {
- rb = rb->rb_right;
- goto new_subtree;
- }
-
- /* STEP 4. Done both subtrees. Head back through ancesstors. */
- for ( ; ; )
- {
- /* We're done when we get back to the root node. */
- if ( rb->rb_parent == NULL )
- goto out;
- /* If we are left of parent, then parent is next to process. */
- if ( rb->rb_parent->rb_left == rb )
- break;
- /* If we are right of parent, then we climb to grandparent. */
- rb = rb->rb_parent;
- }
-
- rb = rb->rb_parent;
- }
-
- out:
- spin_unlock(&blkif->vbd_lock);
- return (rc == 0) ? nr_vbds : rc;
-}
-
-
-int vbd_translate(phys_seg_t *pseg, blkif_t *blkif, int operation)
-{
- blkif_extent_le_t *x;
- vbd_t *vbd;
- rb_node_t *rb;
- blkif_sector_t sec_off;
- unsigned long nr_secs;
-
- spin_lock(&blkif->vbd_lock);
-
- rb = blkif->vbd_rb.rb_node;
- while ( rb != NULL )
- {
- vbd = rb_entry(rb, vbd_t, rb);
- if ( pseg->dev < vbd->vdevice )
- rb = rb->rb_left;
- else if ( pseg->dev > vbd->vdevice )
- rb = rb->rb_right;
- else
- goto found;
- }
-
- DPRINTK("vbd_translate; domain %u attempted to access "
- "non-existent VBD.\n", blkif->domid);
-
- spin_unlock(&blkif->vbd_lock);
- return -ENODEV;
-
- found:
-
- if ( (operation == WRITE) && vbd->readonly )
- {
- spin_unlock(&blkif->vbd_lock);
- return -EACCES;
- }
-
- /*
- * Now iterate through the list of blkif_extents, working out which should
- * be used to perform the translation.
- */
- sec_off = pseg->sector_number;
- nr_secs = pseg->nr_sects;
- for ( x = vbd->extents; x != NULL; x = x->next )
- {
- if ( sec_off < x->extent.sector_length )
- {
- pseg->dev = x->extent.device;
- pseg->sector_number = x->extent.sector_start + sec_off;
- if ( unlikely((sec_off + nr_secs) > x->extent.sector_length) )
- goto overrun;
- spin_unlock(&p->vbd_lock);
- return 1;
- }
- sec_off -= x->extent.sector_length;
- }
-
- DPRINTK("vbd_translate: end of vbd.\n");
- spin_unlock(&blkif->vbd_lock);
- return -EACCES;
-
- /*
- * Here we deal with overrun onto the following extent. We don't deal with
- * overrun of more than one boundary since each request is restricted to
- * 2^9 512-byte sectors, so it should be trivial for control software to
- * ensure that extents are large enough to prevent excessive overrun.
- */
- overrun:
-
- /* Adjust length of first chunk to run to end of first extent. */
- pseg[0].nr_sects = x->extent.sector_length - sec_off;
-
- /* Set second chunk buffer and length to start where first chunk ended. */
- pseg[1].buffer = pseg[0].buffer + (pseg[0].nr_sects << 9);
- pseg[1].nr_sects = nr_secs - pseg[0].nr_sects;
-
- /* Now move to the next extent. Check it exists and is long enough! */
- if ( unlikely((x = x->next) == NULL) ||
- unlikely(x->extent.sector_length < pseg[1].nr_sects) )
- {
- DPRINTK("vbd_translate: multiple overruns or end of vbd.\n");
- spin_unlock(&p->vbd_lock);
- return -EACCES;
- }
-
- /* Store the real device and start sector for the second chunk. */
- pseg[1].dev = x->extent.device;
- pseg[1].sector_number = x->extent.sector_start;
-
- spin_unlock(&blkif->vbd_lock);
- return 2;
-}
ln -sf ../../../../../${LINUX_26}/drivers/xen/netback/netback.c main.c
cd ${AD}/arch/xen/drivers/blkif/backend
+ln -sf ../../../../../${LINUX_26}/drivers/xen/blkback/common.h
+ln -sf ../../../../../${LINUX_26}/drivers/xen/blkback/blkback.c main.c
ln -sf ../../../../../${LINUX_26}/drivers/xen/blkback/control.c
ln -sf ../../../../../${LINUX_26}/drivers/xen/blkback/interface.c
+ln -sf ../../../../../${LINUX_26}/drivers/xen/blkback/vbd.c
static PEND_RING_IDX pending_prod, pending_cons;
#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
-#if 0
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
static kmem_cache_t *buffer_head_cachep;
#endif
add_to_blkdev_list_tail(blkif);
blkif_put(blkif);
}
-
-#if 0 /* XXXcl tq */
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
/* Push the batch through to disc. */
run_task_queue(&tq_disk);
#endif
}
}
-static int end_block_io_op(struct bio *bio, unsigned int done, int error)
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
+static void end_block_io_op(struct buffer_head *bh, int uptodate)
{
- if (done || error) /* XXXcl */
- __end_block_io_op(bio->bi_private, done);
-#if 0
+ __end_block_io_op(bh->b_private, uptodate);
kmem_cache_free(buffer_head_cachep, bh);
+}
#else
+static int end_block_io_op(struct bio *bio, unsigned int done, int error)
+{
+ if ( done || error )
+ __end_block_io_op(bio->bi_private, (done && !error));
bio_put(bio);
-#endif
return error;
}
-
+#endif
/******************************************************************************
static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req)
{
extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]);
-#if 0
- struct buffer_head *bh;
-#else
- struct bio *bio;
-#endif
int operation = (req->operation == BLKIF_OP_WRITE) ? WRITE : READ;
short nr_sects;
unsigned long buffer, fas;
- int i, j, tot_sects, pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
+ int i, tot_sects, pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
pending_req_t *pending_req;
unsigned long remap_prot;
multicall_entry_t mcl[MMAP_PAGES_PER_REQUEST];
if ( nr_sects <= 0 )
goto bad_descriptor;
- phys_seg[nr_psegs].ps_device = req->device;
+ phys_seg[nr_psegs].dev = req->device;
phys_seg[nr_psegs].sector_number = req->sector_number + tot_sects;
phys_seg[nr_psegs].buffer = buffer;
phys_seg[nr_psegs].nr_sects = nr_sects;
blkif_get(blkif);
/* Now we pass each segment down to the real blkdev layer. */
-#if 0
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
for ( i = 0; i < nr_psegs; i++ )
{
+ struct buffer_head *bh;
+
bh = kmem_cache_alloc(buffer_head_cachep, GFP_ATOMIC);
if ( unlikely(bh == NULL) )
{
__end_block_io_op(pending_req, 0);
- continue; /* XXXcl continue!? */
+ continue;
}
+
memset(bh, 0, sizeof (struct buffer_head));
init_waitqueue_head(&bh->b_wait);
#else
for ( i = 0; i < nr_psegs; i++ )
{
- int nr_iovecs = PFN_UP(phys_seg[i].nr_sects << 9);
- ASSERT(nr_iovecs == 1);
- bio = bio_alloc(GFP_ATOMIC, nr_iovecs);
- if ( unlikely(bio == NULL) )
- {
- __end_block_io_op(pending_req, 0);
- break;
- }
- bio->bi_bdev = phys_seg[i].ps_bdev;
- bio->bi_private = pending_req;
- bio->bi_end_io = end_block_io_op;
- bio->bi_sector = phys_seg[i].sector_number;
- bio->bi_rw = operation;
-
- bio->bi_size = 0;
-
- for ( j = 0; j < nr_iovecs; j++ )
- {
- struct bio_vec *bv = bio_iovec_idx(bio, j);
-
- bv->bv_page = virt_to_page(MMAP_VADDR(pending_idx, i));
- bv->bv_len = phys_seg[i].nr_sects << 9;
- bv->bv_offset = phys_seg[i].buffer & ~PAGE_MASK;
-
- bio->bi_size =+ bv->bv_len;
- bio->bi_vcnt++;
- }
-
- submit_bio(operation, bio);
+ struct bio *bio;
+ struct bio_vec *bv;
+
+ bio = bio_alloc(GFP_ATOMIC, 1);
+ if ( unlikely(bio == NULL) )
+ {
+ __end_block_io_op(pending_req, 0);
+ continue;
+ }
+
+ bio->bi_bdev = phys_seg[i].bdev;
+ bio->bi_private = pending_req;
+ bio->bi_end_io = end_block_io_op;
+ bio->bi_sector = phys_seg[i].sector_number;
+ bio->bi_rw = operation;
+
+ bv = bio_iovec_idx(bio, 0);
+ bv->bv_page = virt_to_page(MMAP_VADDR(pending_idx, i));
+ bv->bv_len = phys_seg[i].nr_sects << 9;
+ bv->bv_offset = phys_seg[i].buffer & ~PAGE_MASK;
+
+ bio->bi_size = bv->bv_len;
+ bio->bi_vcnt++;
+
+ submit_bio(operation, bio);
}
#endif
{
int i;
- if ( !(start_info.flags & SIF_INITDOMAIN)
- && !(start_info.flags & SIF_BLK_BE_DOMAIN) )
+ if ( !(start_info.flags & SIF_INITDOMAIN) &&
+ !(start_info.flags & SIF_BLK_BE_DOMAIN) )
return 0;
blkif_interface_init();
if ( kernel_thread(blkio_schedule, 0, CLONE_FS | CLONE_FILES) < 0 )
BUG();
-#if 0
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
buffer_head_cachep = kmem_cache_create(
"buffer_head_cache", sizeof(struct buffer_head),
0, SLAB_HWCACHE_ALIGN, NULL, NULL);
-/******************************************************************************
- * arch/xen/drivers/blkif/backend/common.h
- */
#ifndef __BLKIF__BACKEND__COMMON_H__
#define __BLKIF__BACKEND__COMMON_H__
#define DPRINTK(_f, _a...) ((void)0)
#endif
-#define PRINTK(_f, _a...) printk(KERN_ALERT "(file=%s, line=%d) " _f, \
- __FILE__ , __LINE__ , ## _a )
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+typedef struct rb_root rb_root_t;
+typedef struct rb_node rb_node_t;
+#else
+struct block_device;
+#endif
typedef struct blkif_st {
/* Unique identifier for this interface. */
BLKIF_RING_IDX blk_req_cons; /* Request consumer. */
BLKIF_RING_IDX blk_resp_prod; /* Private version of resp. producer. */
/* VBDs attached to this interface. */
- struct rb_root vbd_rb; /* Mapping from 16-bit vdevices to VBDs. */
+ rb_root_t vbd_rb; /* Mapping from 16-bit vdevices to VBDs. */
spinlock_t vbd_lock; /* Protects VBD mapping. */
/* Private fields. */
enum { DISCONNECTED, DISCONNECTING, CONNECTED } status;
unsigned char readonly; /* Non-zero -> read-only */
unsigned char type; /* VDISK_TYPE_xxx */
blkif_extent_le_t *extents; /* list of xen_extents making up this vbd */
- struct rb_node rb; /* for linking into R-B tree lookup struct */
+ rb_node_t rb; /* for linking into R-B tree lookup struct */
} vbd_t;
void vbd_create(blkif_be_vbd_create_t *create);
/* Describes a [partial] disk extent (part of a block io request) */
typedef struct {
- union {
- unsigned short dev;
- struct block_device *bdev;
- } _dev;
- unsigned short nr_sects;
- unsigned long buffer;
- blkif_sector_t sector_number;
+ unsigned short dev;
+ unsigned short nr_sects;
+ struct block_device *bdev;
+ unsigned long buffer;
+ blkif_sector_t sector_number;
} phys_seg_t;
-#define ps_device _dev.dev
-#define ps_bdev _dev.bdev
int vbd_translate(phys_seg_t *pseg, blkif_t *blkif, int operation);
/******************************************************************************
- * arch/xen/drivers/blkif/backend/vbd.c
+ * blkback/vbd.c
*
* Routines for managing virtual block devices (VBDs).
*
+ * NOTE: vbd_lock protects updates to the rb_tree against concurrent lookups
+ * in vbd_translate. All other lookups are implicitly protected because the
+ * only caller (the control message dispatch routine) serializes the calls.
+ *
* Copyright (c) 2003-2004, Keir Fraser & Steve Hand
*/
#include "common.h"
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
static dev_t vbd_map_devnum(blkif_pdev_t);
-
-/* vbd_lock: protects updates to the rb_tree against concurrent
- * lookups in vbd_translate. All other lookups are implicitly
- * protected because the only caller (the control message dispatch
- * routine) serializes the calls. */
+#endif
void vbd_create(blkif_be_vbd_create_t *create)
{
vbd_t *vbd;
- struct rb_node **rb_p, *rb_parent = NULL;
+ rb_node_t **rb_p, *rb_parent = NULL;
blkif_t *blkif;
blkif_vdev_t vdevice = create->vdevice;
blkif = blkif_find_by_handle(create->domid, create->blkif_handle);
if ( unlikely(blkif == NULL) )
{
- PRINTK("vbd_create attempted for non-existent blkif (%u,%u)\n",
+ DPRINTK("vbd_create attempted for non-existent blkif (%u,%u)\n",
create->domid, create->blkif_handle);
create->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
return;
}
else
{
- PRINTK("vbd_create attempted for already existing vbd\n");
+ DPRINTK("vbd_create attempted for already existing vbd\n");
create->status = BLKIF_BE_STATUS_VBD_EXISTS;
return;
}
if ( unlikely((vbd = kmalloc(sizeof(vbd_t), GFP_KERNEL)) == NULL) )
{
- PRINTK("vbd_create: out of memory\n");
+ DPRINTK("vbd_create: out of memory\n");
create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
return;
}
blkif_t *blkif;
blkif_extent_le_t **px, *x;
vbd_t *vbd = NULL;
- struct rb_node *rb;
+ rb_node_t *rb;
blkif_vdev_t vdevice = grow->vdevice;
unsigned long sz;
-
blkif = blkif_find_by_handle(grow->domid, grow->blkif_handle);
if ( unlikely(blkif == NULL) )
{
- PRINTK("vbd_grow attempted for non-existent blkif (%u,%u)\n",
+ DPRINTK("vbd_grow attempted for non-existent blkif (%u,%u)\n",
grow->domid, grow->blkif_handle);
grow->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
return;
if ( unlikely(vbd == NULL) || unlikely(vbd->vdevice != vdevice) )
{
- PRINTK("vbd_grow: attempted to append extent to non-existent VBD.\n");
+ DPRINTK("vbd_grow: attempted to append extent to non-existent VBD.\n");
grow->status = BLKIF_BE_STATUS_VBD_NOT_FOUND;
return;
}
if ( grow->extent.sector_start > 0 )
{
- PRINTK("vbd_grow: device %08x start not zero!\n", grow->extent.device);
- grow->status = BLKIF_BE_STATUS_EXTENT_NOT_FOUND;
- return;
+ DPRINTK("vbd_grow: dev %08x start not zero.\n", grow->extent.device);
+ grow->status = BLKIF_BE_STATUS_EXTENT_NOT_FOUND;
+ return;
}
if ( unlikely((x = kmalloc(sizeof(blkif_extent_le_t),
GFP_KERNEL)) == NULL) )
{
- PRINTK("vbd_grow: out of memory\n");
+ DPRINTK("vbd_grow: out of memory\n");
grow->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
return;
}
x->extent.sector_length = grow->extent.sector_length;
x->next = (blkif_extent_le_t *)NULL;
-#if 01
- /* XXXcl see comments at top of open_by_devnum */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
x->bdev = open_by_devnum(vbd_map_devnum(x->extent.device),
- vbd->readonly ? FMODE_READ : FMODE_WRITE);
- if (IS_ERR(x->bdev)) {
- PRINTK("vbd_grow: device %08x doesn't exist.\n", x->extent.device);
- grow->status = BLKIF_BE_STATUS_EXTENT_NOT_FOUND;
- goto out;
+ vbd->readonly ? FMODE_READ : FMODE_WRITE);
+ if ( IS_ERR(x->bdev) )
+ {
+ DPRINTK("vbd_grow: device %08x doesn't exist.\n", x->extent.device);
+ grow->status = BLKIF_BE_STATUS_EXTENT_NOT_FOUND;
+ goto out;
}
/* XXXcl maybe bd_claim? */
- if( x->bdev->bd_disk == NULL || x->bdev->bd_part == NULL )
+ if ( (x->bdev->bd_disk == NULL) || (x->bdev->bd_part == NULL) )
{
- PRINTK("vbd_grow: device %08x doesn't exist.\n", x->extent.device);
- grow->status = BLKIF_BE_STATUS_EXTENT_NOT_FOUND;
- blkdev_put(x->bdev);
- goto out;
+ DPRINTK("vbd_grow: device %08x doesn't exist.\n", x->extent.device);
+ grow->status = BLKIF_BE_STATUS_EXTENT_NOT_FOUND;
+ blkdev_put(x->bdev);
+ goto out;
}
-#endif
/* get size in sectors */
sz = x->bdev->bd_part->nr_sects;
+#else
+ if( !blk_size[MAJOR(x->extent.device)] )
+ {
+ DPRINTK("vbd_grow: device %08x doesn't exist.\n", x->extent.device);
+ grow->status = BLKIF_BE_STATUS_EXTENT_NOT_FOUND;
+ goto out;
+ }
+
+ /* convert blocks (1KB) to sectors */
+ sz = blk_size[MAJOR(x->extent.device)][MINOR(x->extent.device)] * 2;
+
+ if ( sz == 0 )
+ {
+ DPRINTK("vbd_grow: device %08x zero size!\n", x->extent.device);
+ grow->status = BLKIF_BE_STATUS_EXTENT_NOT_FOUND;
+ goto out;
+ }
+#endif
/*
* NB. This test assumes sector_start == 0, which is always the case
for ( px = &vbd->extents; *px != NULL; px = &(*px)->next )
continue;
- *px = x;
+ *px = x; /* ATOMIC: no need for vbd_lock. */
DPRINTK("Successful grow of vdev=%04x (dom=%u)\n",
vdevice, grow->domid);
blkif_t *blkif;
blkif_extent_le_t **px, *x;
vbd_t *vbd = NULL;
- struct rb_node *rb;
+ rb_node_t *rb;
blkif_vdev_t vdevice = shrink->vdevice;
blkif = blkif_find_by_handle(shrink->domid, shrink->blkif_handle);
if ( unlikely(vbd == NULL) || unlikely(vbd->vdevice != vdevice) )
{
shrink->status = BLKIF_BE_STATUS_VBD_NOT_FOUND;
- return;
+ return;
}
if ( unlikely(vbd->extents == NULL) )
{
shrink->status = BLKIF_BE_STATUS_EXTENT_NOT_FOUND;
- return;
+ return;
}
/* Find the last extent. We now know that there is at least one. */
continue;
x = *px;
- *px = x->next;
+ *px = x->next; /* ATOMIC: no need for vbd_lock. */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
blkdev_put(x->bdev);
+#endif
kfree(x);
shrink->status = BLKIF_BE_STATUS_OKAY;
{
blkif_t *blkif;
vbd_t *vbd;
- struct rb_node *rb;
+ rb_node_t *rb;
blkif_extent_le_t *x, *t;
blkif_vdev_t vdevice = destroy->vdevice;
blkif = blkif_find_by_handle(destroy->domid, destroy->blkif_handle);
if ( unlikely(blkif == NULL) )
{
- PRINTK("vbd_destroy attempted for non-existent blkif (%u,%u)\n",
+ DPRINTK("vbd_destroy attempted for non-existent blkif (%u,%u)\n",
destroy->domid, destroy->blkif_handle);
destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
return;
void destroy_all_vbds(blkif_t *blkif)
{
- vbd_t *vbd;
- struct rb_node *rb;
+ vbd_t *vbd;
+ rb_node_t *rb;
blkif_extent_le_t *x, *t;
spin_lock(&blkif->vbd_lock);
int vbd_probe(blkif_t *blkif, vdisk_t *vbd_info, int max_vbds)
{
- int rc = 0, nr_vbds = 0;
- struct rb_node *rb;
+ int rc = 0, nr_vbds = 0;
+ rb_node_t *rb;
spin_lock(&blkif->vbd_lock);
{
blkif_extent_le_t *x;
vbd_t *vbd;
- struct rb_node *rb;
+ rb_node_t *rb;
blkif_sector_t sec_off;
unsigned long nr_secs;
while ( rb != NULL )
{
vbd = rb_entry(rb, vbd_t, rb);
- if ( pseg->ps_device < vbd->vdevice )
+ if ( pseg->dev < vbd->vdevice )
rb = rb->rb_left;
- else if ( pseg->ps_device > vbd->vdevice )
+ else if ( pseg->dev > vbd->vdevice )
rb = rb->rb_right;
else
goto found;
{
if ( sec_off < x->extent.sector_length )
{
-#if 0
- pseg->ps_device = x->extent.device;
-#else
- pseg->ps_bdev = x->bdev;
-#endif
+ pseg->dev = x->extent.device;
+ pseg->bdev = x->bdev;
pseg->sector_number = x->extent.sector_start + sec_off;
if ( unlikely((sec_off + nr_secs) > x->extent.sector_length) )
goto overrun;
}
/* Store the real device and start sector for the second chunk. */
-#if 0
- pseg[1].ps_device = x->extent.device;
-#else
- pseg->ps_bdev = x->bdev;
-#endif
+ pseg[1].dev = x->extent.device;
+ pseg[1].bdev = x->bdev;
pseg[1].sector_number = x->extent.sector_start;
spin_unlock(&blkif->vbd_lock);
return 2;
}
-#define MAJOR_XEN(dev) ((dev)>>8)
-#define MINOR_XEN(dev) ((dev) & 0xff)
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+
+#define MAJOR_XEN(dev) ((dev)>>8)
+#define MINOR_XEN(dev) ((dev) & 0xff)
#ifndef FANCY_REMAPPING
static dev_t vbd_map_devnum(blkif_pdev_t cookie)
return MKDEV(major, minor);
}
#else
-#define XEN_IDE0_MAJOR IDE0_MAJOR
-#define XEN_IDE1_MAJOR IDE1_MAJOR
-#define XEN_IDE2_MAJOR IDE2_MAJOR
-#define XEN_IDE3_MAJOR IDE3_MAJOR
-#define XEN_IDE4_MAJOR IDE4_MAJOR
-#define XEN_IDE5_MAJOR IDE5_MAJOR
-#define XEN_IDE6_MAJOR IDE6_MAJOR
-#define XEN_IDE7_MAJOR IDE7_MAJOR
-#define XEN_IDE8_MAJOR IDE8_MAJOR
-#define XEN_IDE9_MAJOR IDE9_MAJOR
-#define XEN_SCSI_DISK0_MAJOR SCSI_DISK0_MAJOR
-#define XEN_SCSI_DISK1_MAJOR SCSI_DISK1_MAJOR
-#define XEN_SCSI_DISK2_MAJOR SCSI_DISK2_MAJOR
-#define XEN_SCSI_DISK3_MAJOR SCSI_DISK3_MAJOR
-#define XEN_SCSI_DISK4_MAJOR SCSI_DISK4_MAJOR
-#define XEN_SCSI_DISK5_MAJOR SCSI_DISK5_MAJOR
-#define XEN_SCSI_DISK6_MAJOR SCSI_DISK6_MAJOR
-#define XEN_SCSI_DISK7_MAJOR SCSI_DISK7_MAJOR
-#define XEN_SCSI_CDROM_MAJOR SCSI_CDROM_MAJOR
+#define XEN_IDE0_MAJOR IDE0_MAJOR
+#define XEN_IDE1_MAJOR IDE1_MAJOR
+#define XEN_IDE2_MAJOR IDE2_MAJOR
+#define XEN_IDE3_MAJOR IDE3_MAJOR
+#define XEN_IDE4_MAJOR IDE4_MAJOR
+#define XEN_IDE5_MAJOR IDE5_MAJOR
+#define XEN_IDE6_MAJOR IDE6_MAJOR
+#define XEN_IDE7_MAJOR IDE7_MAJOR
+#define XEN_IDE8_MAJOR IDE8_MAJOR
+#define XEN_IDE9_MAJOR IDE9_MAJOR
+#define XEN_SCSI_DISK0_MAJOR SCSI_DISK0_MAJOR
+#define XEN_SCSI_DISK1_MAJOR SCSI_DISK1_MAJOR
+#define XEN_SCSI_DISK2_MAJOR SCSI_DISK2_MAJOR
+#define XEN_SCSI_DISK3_MAJOR SCSI_DISK3_MAJOR
+#define XEN_SCSI_DISK4_MAJOR SCSI_DISK4_MAJOR
+#define XEN_SCSI_DISK5_MAJOR SCSI_DISK5_MAJOR
+#define XEN_SCSI_DISK6_MAJOR SCSI_DISK6_MAJOR
+#define XEN_SCSI_DISK7_MAJOR SCSI_DISK7_MAJOR
+#define XEN_SCSI_CDROM_MAJOR SCSI_CDROM_MAJOR
static dev_t vbd_map_devnum(blkif_pdev_t cookie)
{
case XEN_IDE9_MAJOR: new_major = IDE9_MAJOR; break;
case XEN_SCSI_DISK0_MAJOR: new_major = SCSI_DISK0_MAJOR; break;
case XEN_SCSI_DISK1_MAJOR ... XEN_SCSI_DISK7_MAJOR:
- new_major = SCSI_DISK1_MAJOR + major - XEN_SCSI_DISK1_MAJOR;
- break;
+ new_major = SCSI_DISK1_MAJOR + major - XEN_SCSI_DISK1_MAJOR;
+ break;
case XEN_SCSI_CDROM_MAJOR: new_major = SCSI_CDROM_MAJOR; break;
default: new_major = 0; break;
}
return MKDEV(new_major, minor);
}
#endif
+
+#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION_CODE(2,6,0) */